Manuscript draft (google doc) available here: https://docs.google.com/document/d/15joiEhDGsYJTgS0y5KkjgVT_Usv1emeCA80fxc0Odkk/edit?usp=sharing
Site summary data
Biweekly summary of variables
Case Date Imputation. about that..
Model for cases ~ .
Test high vs low traffic as FE in RE model
Finalize fig. 2
Finalize fig. 3
SFig: WW by building
Finish written methods and results
# setup ---------------------------------------------
library(tidyverse)
library(here)
library(ggiraph)
library(patchwork)
library(ggtext)
library(janitor)
ggplot2::theme_set(theme_bw() + theme(
legend.text = element_markdown(family = 'IBM Plex Sans', colour = 'gray30'),
axis.text = element_markdown(family = 'IBM Plex Sans', colour = 'gray30'),
axis.title = element_markdown(family = 'IBM Plex Sans',colour = 'gray30'),
))
hrbrthemes::import_plex_sans()
source(here('R/report_plots.R'))
# Functions -----------------------------------------
convert_cq_to_copies <- function(cq) 10 ** ((40.1 - cq) / 3.23)
# creates yyyy-ww label for grouping data
get_date_week <- function(x){
y <- lubridate::year(x)
w <- lubridate::week(x) |> str_pad(2, 'left', 0)
str_glue("{y}-{w}")
}
# reverses get_date_week to Weds date during week
week_to_date <- function(year_week){
year <- str_extract(year_week, '^....')
d <- str_glue('{year}-01-01') |> as_date()
wk <- str_extract(year_week, '..$') |> as.integer()
ydays <- if_else(
year == '2021',
days(7*wk - 2),
days(7*wk - 3),
)
return(d + ydays)
}
# get biweekly bin date for vector of dates
get_date_biweekly <- function(x){
day_of_week <- lubridate::wday(x)
case_when(
day_of_week %in% 1:3 ~ x + 2 - day_of_week,
TRUE ~ x + 5 - day_of_week,
)
}
# dates 2021-2022 with biweekly groups
biweekly_date_sequence <- function(){
tibble(
date = seq(
as_date('2021-01-01'),
as_date('2022-12-31'),
by = 1)
) |>
mutate(biweek = get_date_biweekly(date))
}
# lookup table for date
week_midpoint_date_lookup <- function(start = '2021-01-01', end = '2023-01-01'){
ends = lubridate::as_date(c(start, end))
tibble(
date = seq(ends[1], ends[2], by = 1),
date_week = lubridate::week(date) |> str_pad(2, 'left', '0'),
date_year = lubridate::year(date),
week = str_glue("{date_year}-{date_week}"),
) |>
group_by(week) |>
summarise(date = mean(date))
}
# convert site1/site2 to hi/low traffic
get_traffic_level <- function(location){
if_else(
str_detect(location, 'Site 1'),
'high traffic',
'low traffic'
)
}
binom_ci <- function(x, n){
Hmisc::binconf(x, n, method = 'wilson', alpha = 0.05) |>
as_tibble() |>
janitor::clean_names()
}
get_binom_ci <- function(data){
data |>
summarise(
x = sum(pcr_positive == 'Positive'),
n = n(),
binconf = map2(x, n, ~binom_ci(.x, .y))
) |>
unnest(binconf) |>
mutate(across(c(point_est, lower, upper), ~(.x*100) |> round(1)))
}
# Plot Elements ---------------------------------------
theme_color <- 'cornflowerblue'
# layout x-axis
scale_x_study_dates <- function(){
scale_x_date(
date_breaks = 'month',
date_labels = month.name[c(9:12, 1:5)] |> strtrim(3) |>
paste0(' ', c(rep(2021, 4), rep(2022, 5))),
limits = c(
min(swabs_tidy$date_swab),
max(swabs_tidy$date_swab)
))
}
scale_x_date_month <- function(){
scale_x_date(
date_breaks = 'month',
date_labels = month.name[c(9:12, 1:5)] |> strtrim(1),
limits = c(
min(swabs_tidy$date_swab),
max(swabs_tidy$date_swab)
))
}
# get rid of x-axis for vertically stacked plots
theme_no_x_labels <- function(){
theme(
axis.ticks.x = element_blank(),
axis.title.x = element_blank(),
axis.text.x = element_blank()
)
}
# get rid of y-axis for horizontally stacked plots
theme_no_y_labels <- function(){
theme(
axis.ticks.y = element_blank(),
axis.title.y = element_blank(),
axis.text.y = element_blank()
)
}
# no minor grid and adjust spacing for multipanel
theme_remove_grid <- function(){
theme(
axis.title.y = element_blank(),
panel.grid.minor = element_blank(),
plot.title.position = 'panel',
plot.title = element_markdown(
vjust = -1,
size = 8,
face = 'bold',
family = 'IBM Plex Sans',
colour = 'gray50',
margin = margin(0, 0, 0, 0)),
plot.margin = unit(c(0, 0, 0, 0), 'mm'))
}
# Data ------------------------------------------------
# swab data
swabs <-
read_rds(here('data/cube.rds')) |>
filter(str_detect(site, '^UO_'))
# filtered swabs without controls or sponge samples
swabs_tidy <- swabs |>
filter(!negative_control,
swab_type != 'sponge',
date_swab < '2022-04-27') |>
select(date_swab, site, floor, location, pcr_positive, pcr_ct, co2) |>
mutate(biweek = get_date_biweekly(date_swab))
# lookup table for waste water site names
lookup_ww <- tribble(
~site_abbr, ~site_name,
'UO_AA', 'Annex Residence',
'UO_FA', 'Faculty of Social Sciences',
'UO_FT', 'Friel Residence',
'UO_NA', 'Northern sampling site',
'UO_RGN', 'Roger Guindon Hall',
'UO_ST', 'Southern sampling site',
'UO_TBT', 'Tabaret Hall'
)
# UOttawa waste water data from R. Delatolla to clean
clean_ww_excel <- function(){
readxl::read_excel(here::here('data/ww_university.xlsx')) |>
janitor::clean_names() |>
transmute(
sample_date = as_date(sample_date),
site = source_name,
signal = viral_copies_per_copies_pep_avg
) |>
mutate(
site = str_remove_all(
site, 'Data\\s-\\suOttawa\\s-\\s|.xlsx'
) |>
str_to_upper()
) |>
filter(site != 'UO_RGN', !is.na(signal))
}
clean_ww_excel() |>
write_csv(here::here('data/ww_university_clean.csv'))
rm(clean_ww_excel)
# clean WW data close to study period, no RGN site.
uottawa_ww <-
read_csv(here::here('data/ww_university_clean.csv'),
show_col_types = FALSE) |>
filter(
sample_date > '2021-09-15',
sample_date < '2022-05-05'
) |>
left_join(lookup_ww, by = c('site' = 'site_abbr'))
# ottawa wastewater data: daily means
regional_ww <-
read_rds(here('data/ww_ottawa.rds')) |>
select(sample_date, starts_with('cov_')) |>
pivot_longer(contains('cov_')) |>
mutate(target = str_extract(name, 'cov_n.'),
stat = str_extract(name, 'mean|sd'),
) |>
select(-name) |>
pivot_wider(names_from = stat, values_from = value) |>
mutate(.lower = mean - sd, .upper = mean + sd)
# wifi data from university of ottawa
wifi <-
read_rds(here('data/uo_wifi.rds')) |>
filter(date >= min(swabs$date_swab) - 2,
date <= max(swabs$date_swab) + 2) |>
mutate(biweek = get_date_biweekly(date))
# important dates for context
event_dates <-
tribble(
~event, ~start, ~end,
'Reading Week', '2021-10-24', '2021-10-30',
'Holiday Break', '2021-12-22', '2022-01-04',
'Closure', '2022-01-04', '2022-01-31',
'Closure', '2022-02-16', '2022-02-21',
'Reading Week', '2022-02-20', '2022-02-26',
) |>
mutate(across(start:end, as_date))
# UOttawa cases data
# clean data from: source(here('R/01_impute_missing_case_dates.R'))
cases <-
read_rds(here('data/cases_rule_imputed.rds')) |>
select(case, role, case_date, UC:TBT) |>
mutate(biweek = get_date_biweekly(case_date)) |>
nest(associated_sites = UC:TBT)
total_n_cases_study_period <-
cases |>
filter(case_date > '2021-09-20') |>
nrow()
# Summaries -----------------------------------------
#
positivity_summary <- function(swabs){
swabs |>
summarise(
n = n(),
x = sum(pcr_positive == 'Positive', na.rm = F),
positivity = round(100 * x / n, digits = 1),
.groups = 'drop')
}
co2_summary <- function(swabs){
swabs |>
summarise(
n = n(),
co2_vals = list(co2),
co2_mean = mean(co2, na.rm = T),
.groups = 'drop')
}
# overall summary
swab_summary <- swabs_tidy |> positivity_summary()
# site summary
swab_summary_sites <- swabs_tidy |>
group_by(site) |>
positivity_summary()
# high-low traffic summary
swab_summary_location_traffic <-
swabs_tidy |>
mutate(traffic = get_traffic_level(location)) |>
group_by(traffic) |>
positivity_summary()
## Figure 1: data aggregation --------------------------
# uottawa cases - biweekly counts
cases_biweekly <-
cases |>
select(case, biweek, role) |>
group_by(biweek) |>
summarise(cases = n(),
cases_student = sum(role == 'Student'),
cases_staff = sum(role == 'Employee'),
) |>
right_join(
biweekly_date_sequence() |>
filter(biweek < '2022-02-03') |>
distinct(biweek),
by = 'biweek'
) |>
mutate(across(where(is.integer), ~if_else(is.na(.), 0L, .)))
# swabs biweekly summary
swabs_biweekly <-
swabs_tidy |>
group_by(biweek) |>
positivity_summary() |>
left_join(swabs_tidy |> group_by(biweek) |> co2_summary(),
by = join_by(biweek, n))
# swabs biweekly summary by site
swabs_biweekly_by_site <-
swabs_tidy |>
group_by(site, biweek) |>
positivity_summary() |>
left_join(
swabs_tidy |> group_by(site, biweek) |> co2_summary(),
by = join_by(site, biweek, n)
)
# UOttawa ww biweekly means
uottawa_ww_biweekly <-
uottawa_ww |>
mutate(biweek = get_date_biweekly(sample_date)) |>
group_by(biweek) |>
summarise(signal = mean(signal, na.rm = T),
n = n())
# daily ww data for study period
regional_ww_daily <-
regional_ww |>
filter(
sample_date >= min(swabs_tidy$date_swab) - 4,
sample_date <= max(swabs_tidy$date_swab) + 4,
) |>
group_by(sample_date) |>
summarise(mean = mean(mean, na.rm = T))
# regional ww biweekly means
regional_ww_biweekly <-
regional_ww_daily |>
mutate(biweek = get_date_biweekly(sample_date)) |>
group_by(biweek) |>
summarise(mean = mean(mean))
# overall wifi biweekly timeseries
wifi_biweekly <-
wifi |>
group_by(biweek) |>
summarise(
n = n(),
min = min(clients, na.rm = T),
mean = mean(clients, na.rm = T),
max = max(clients, na.rm = T)
)
# per building wifi biweekly timeseries
wifi_biweek_sites <-
wifi |>
group_by(biweek, site) |>
summarise(
n = n(),
min = min(clients, na.rm = T),
mean = mean(clients, na.rm = T),
max = max(clients, na.rm = T),
.groups = 'drop'
)
# Map --------------------------------------------------
swab_coords <- tribble(
~site, ~name, ~lat, ~lng,
'MRT', 'Morrisette Hall (MRT)', 45.4232391, -75.684289,
'MNT', 'Montpetit', 45.4225417102, -75.6826587146,
'90U', '90 University', 45.422425557, -75.68501449,
'DMS', 'Desmarais Bldg.', 45.4238767934, -75.687270754,
'TBT', 'Tabaret Hall', 45.4245094016, -75.6863190018,
'LPR', 'Louis Pasteur Bldg.', 45.42137566861, -75.6802638999,
)
ww_coords <- tribble(
~site, ~name, ~lat, ~lng,
'aa', 'Annex Residence', 45.4267812902, -75.6803440596,
'fa', 'Faculty of Soc. Sci.', 45.4216247, -75.6839038601,
'ft', 'Friel Residence', 45.4304344008, -75.6829076653,
'tbt', 'Tabaret Hall', 45.4245094016, -75.6863190018
# 'na', 'North sampling Site', NA, NA,
# 'st', 'South Sampling Site', NA, NA, # Nobody knows where this is
)
figure_sites_map <-
leaflet::leaflet(swab_coords, width = 600, height = 330) |>
leaflet::addProviderTiles(
leaflet::providers$CartoDB.Positron
) |>
leaflet::setView(lng = -75.6843, lat = 45.4235, zoom = 14) |>
leaflet::addCircleMarkers(
~lng, ~lat,
label = ~name,
popup = ~name,
radius = 2,
color = '#440099'
) |>
leaflet::addLabelOnlyMarkers(
~lng, ~lat,
label = ~site,
labelOptions = leaflet::labelOptions(
noHide = T, direction = 'top', textOnly = T,
)) |>
leaflet::addCircleMarkers(
~lng,
~lat,
label = ~site,
radius = 2,
color = '#440099'
) |>
leaflet::addCircleMarkers(
data = ww_coords,
~lng,
~lat,
label = ~name,
popup = ~name,
radius = 4,
color = '#f96999'
)
# Results ----------------------------------------------------
rs_data <- lst(
swabs_collected = nrow(swabs_tidy),
positivity_ovr = swab_summary$positivity,
positivity_site_min = min(swab_summary_sites$positivity),
positivity_site_max = max(swab_summary_sites$positivity),
)
rs_abstract <- rs_data |>
glue::glue_data(
"Over the 32-week study period, we collected {swabs_collected} swabs at six university buildings, including two buildings where wastewater surveillance was currently ongoing.
Overall, {positivity_ovr}% of swabs were PCR-positive for SARS-CoV-2, with individual buildings ranging between {positivity_site_min}% and {positivity_site_max}%.
*Comment on when spikes in cases, swabs, and waste-water occurred….* *Comment on prediction of cases using swabs, ww, or combined approach…*
"
)
# rs_results <- rs_data |> glue::glue_data()
## corr -----
# joined data for pairwise correlation
biweekly_data <- swabs_biweekly |>
transmute(biweek, `Swab PCR` = positivity, `CO2` = co2_mean) |>
left_join(
wifi_biweekly |> transmute(biweek, `Wi-Fi` = mean),
by = 'biweek'
) |>
left_join(
uottawa_ww_biweekly |> transmute(biweek, `University WW` = signal),
by = 'biweek'
) |>
left_join(
regional_ww_biweekly |> transmute(biweek, `Ottawa WW` = mean),
by = 'biweek'
) |>
left_join(
cases_biweekly |> transmute(biweek, Cases = cases),
by = 'biweek'
)
corr_spearman <- biweekly_data |>
select(-biweek) |>
corrr::correlate(use = 'pairwise.complete.obs',
method = 'spearman')
# to get pvalues...
corrtest <-
biweekly_data |>
select(-biweek) |>
psych::corr.test(
use = 'pairwise',
method = 'spearman',
adjust = 'none' # See p.adjust for "holm" > "bonferroni").
)
# spearman r and p-values; upper triangle has adj. correlations
corr_r <- corrtest$r |> round(3)
corr_p <- corrtest$p |> round(3)
# remove lower tri with raw probabilities
corr_r[lower.tri(corr_r, diag = T)] <- NA
corr_p[lower.tri(corr_p, diag = T)] <- NA
corr_tbl_r <- kableExtra::kable(
x = corr_r,
format = 'pipe',
caption = 'Campus-wide, biweekly metrics: Spearman r'
)
corr_tbl_p <- kableExtra::kable(
x = corr_p,
format = 'pipe',
caption = 'Campus-wide, biweekly metrics: p-value on Spearman r'
)
corr_ci <- corrtest |>
pluck('ci') |>
as_tibble() |>
mutate(terms = rownames(corrtest$ci), .before = 1L)
rm(corr_r, corr_p, corrtest)cases ~ swab_pcr + co2 + wifi + {university_ww} + regional_ww + (all interactions...).cases counted only until Feb 2022 (~40% miss).university_ww couple missing dates.wifi occasional missing data.# n=31 biweekly obs with cases
df_mod <- biweekly_data |>
janitor::clean_names() |>
filter(!is.na(cases)) |>
mutate(across(
-c(cases, biweek), ~as.numeric(scale(.))
))
# only 29 complete observations, oof
df_mod_complete_obs <- df_mod |> na.omit()
# missing data
naniar::vis_miss(df_mod) +
labs(subtitle = 'Aggregated TS Missingness')Cases counts seem to follow a negative binomial distribution more than Poisson.
# simulated curves
add_poisson_density_curve <- function(p) {
p + geom_density(
data = tibble(
x = rpois(n = sum(!is.na(df_mod$cases)),
lambda = mean(df_mod$cases, na.rm = T))
),
aes(x), color = 'blue', alpha = 0.25, size = 0.03
)
}
add_nb_density_curve <- function(p) {
p +
geom_density(
data = tibble(x = rnbinom(
size = 1,
n = sum(!is.na(df_mod$cases)),
mu = mean(df_mod$cases, na.rm = T),
)),
aes(x), color = 'blue', alpha = 0.25, linewidth = 0.03
)
}
simulation_plot <- function(plt_fn, title){
p <- df_mod |> ggplot()
walk(seq(100), ~ {p <<- plt_fn(p)})
p + geom_density(aes(cases)) + labs(x = 'Cases', subtitle = title)
}
# Poisson dist
p1 <- simulation_plot(
add_poisson_density_curve,
'Cases vs. Simulated poisson distribution'
)
# NB dist
p2 <- simulation_plot(
add_nb_density_curve,
'Cases vs. Simulated negative binomial distribution'
)
(p1 / p2)rm(p1, p2, add_poisson_density_curve, add_nb_density_curve)Backwards selection arrived at the model with swab_pcr,
wifi, ottawa_ww, and the interaction between
ottawa_ww & swab_pcr.
# poisson model for cases with interactions, n = 29
full_poisson <- glm(
cases ~ . ^ 2,
data = df_mod_complete_obs |> select(-biweek),
family = 'poisson')
# do backward elim
backward_poisson <- step(full_poisson, trace = F)
stopifnot(backward_poisson$converged)
# standardized coefs and 95% CI
backward_poisson |>
broom::tidy(conf.int = T, conf.level = 0.95) |>
mutate(across(where(is.double), ~round(., 4))) |>
kableExtra::kable(
format = 'pipe',
caption = "Poisson model with interactions: Coefficients & 95% CI"
)| term | estimate | std.error | statistic | p.value | conf.low | conf.high |
|---|---|---|---|---|---|---|
| (Intercept) | 1.1442 | 0.5675 | 2.0162 | 0.0438 | -0.0795 | 2.1958 |
| swab_pcr | 2.3783 | 0.8588 | 2.7694 | 0.0056 | 0.7922 | 4.2157 |
| co2 | 1.9210 | 0.9231 | 2.0811 | 0.0374 | 0.2730 | 3.9313 |
| wi_fi | 3.5750 | 1.1737 | 3.0460 | 0.0023 | 1.5024 | 6.2294 |
| university_ww | 1.2805 | 1.7684 | 0.7241 | 0.4690 | -1.9410 | 5.1481 |
| ottawa_ww | 5.2590 | 1.7983 | 2.9244 | 0.0035 | 1.8781 | 9.0246 |
| swab_pcr:co2 | 9.1549 | 3.6425 | 2.5133 | 0.0120 | 3.0116 | 17.3829 |
| swab_pcr:wi_fi | -6.3519 | 2.6731 | -2.3762 | 0.0175 | -12.4347 | -1.9227 |
| swab_pcr:university_ww | -12.9791 | 5.5541 | -2.3368 | 0.0194 | -25.9487 | -4.2389 |
| co2:wi_fi | -1.5300 | 1.0604 | -1.4429 | 0.1491 | -3.8359 | 0.2999 |
| co2:ottawa_ww | -8.6825 | 3.6457 | -2.3816 | 0.0172 | -16.9094 | -2.6198 |
| wi_fi:ottawa_ww | 8.2488 | 3.0927 | 2.6671 | 0.0077 | 2.8984 | 15.1644 |
| university_ww:ottawa_ww | 17.6551 | 7.1573 | 2.4667 | 0.0136 | 5.9779 | 34.1894 |
# use type III SS tests since swab*ww interaction is significant
car::Anova(backward_poisson, type = 3) |>
rownames_to_column(var = 'Term') |>
as_tibble() |>
kableExtra::kable(
format = 'pipe',
caption = "Type III ANOVA"
)| Term | LR Chisq | Df | Pr(>Chisq) |
|---|---|---|---|
| swab_pcr | 8.9472598 | 1 | 0.0027789 |
| co2 | 5.3453757 | 1 | 0.0207773 |
| wi_fi | 12.9798694 | 1 | 0.0003149 |
| university_ww | 0.5636378 | 1 | 0.4527982 |
| ottawa_ww | 9.5211159 | 1 | 0.0020312 |
| swab_pcr:co2 | 10.5082735 | 1 | 0.0011884 |
| swab_pcr:wi_fi | 9.4989055 | 1 | 0.0020559 |
| swab_pcr:university_ww | 10.9326787 | 1 | 0.0009448 |
| co2:wi_fi | 2.5852371 | 1 | 0.1078643 |
| co2:ottawa_ww | 9.7670970 | 1 | 0.0017766 |
| wi_fi:ottawa_ww | 10.8874803 | 1 | 0.0009682 |
| university_ww:ottawa_ww | 10.9797961 | 1 | 0.0009211 |
# test model assumption of equidispersion
# p > 0.05 means no significant overdispersion
AER::dispersiontest(backward_poisson)##
## Overdispersion test
##
## data: backward_poisson
## z = -2.4413, p-value = 0.9927
## alternative hypothesis: true dispersion is greater than 1
## sample estimates:
## dispersion
## 0.6427776
summary(backward_poisson)##
## Call:
## glm(formula = cases ~ swab_pcr + co2 + wi_fi + university_ww +
## ottawa_ww + swab_pcr:co2 + swab_pcr:wi_fi + swab_pcr:university_ww +
## co2:wi_fi + co2:ottawa_ww + wi_fi:ottawa_ww + university_ww:ottawa_ww,
## family = "poisson", data = select(df_mod_complete_obs, -biweek))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.19665 -0.51663 -0.15131 0.05176 1.21404
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.1442 0.5675 2.016 0.04378 *
## swab_pcr 2.3783 0.8588 2.769 0.00562 **
## co2 1.9210 0.9231 2.081 0.03743 *
## wi_fi 3.5750 1.1737 3.046 0.00232 **
## university_ww 1.2805 1.7684 0.724 0.46902
## ottawa_ww 5.2590 1.7983 2.924 0.00345 **
## swab_pcr:co2 9.1549 3.6425 2.513 0.01196 *
## swab_pcr:wi_fi -6.3519 2.6731 -2.376 0.01749 *
## swab_pcr:university_ww -12.9791 5.5541 -2.337 0.01945 *
## co2:wi_fi -1.5300 1.0604 -1.443 0.14905
## co2:ottawa_ww -8.6825 3.6457 -2.382 0.01724 *
## wi_fi:ottawa_ww 8.2488 3.0927 2.667 0.00765 **
## university_ww:ottawa_ww 17.6551 7.1573 2.467 0.01363 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 176.454 on 28 degrees of freedom
## Residual deviance: 13.215 on 16 degrees of freedom
## AIC: 88.782
##
## Number of Fisher Scoring iterations: 6
plot(backward_poisson)hist(backward_poisson$residuals)rm(full_poisson)Poisson model chosen by backward elimination with no interactions identifies swab positivity, Ottawa WW signal, university WW, and wifi as significant predictors. CO2 is the only main effect dropped from the full model during backward elimination
# backwards select with n=30, no university ww
me_poisson <- glm(
cases ~ .,
data = df_mod_complete_obs |> select(-biweek),
family = 'poisson'
)
backward_poisson_main_only <- step(me_poisson, trace = T)## Start: AIC=93.38
## cases ~ swab_pcr + co2 + wi_fi + university_ww + ottawa_ww
##
## Df Deviance AIC
## - co2 1 31.964 91.531
## <none> 31.810 93.377
## - wi_fi 1 36.465 96.032
## - swab_pcr 1 38.125 97.692
## - university_ww 1 38.473 98.041
## - ottawa_ww 1 46.845 106.413
##
## Step: AIC=91.53
## cases ~ swab_pcr + wi_fi + university_ww + ottawa_ww
##
## Df Deviance AIC
## <none> 31.964 91.531
## - wi_fi 1 36.714 94.281
## - swab_pcr 1 38.137 95.705
## - university_ww 1 38.601 96.169
## - ottawa_ww 1 47.733 105.301
stopifnot(backward_poisson_main_only$converged)
# nearly significant overdispersion when only main effects included
AER::dispersiontest(backward_poisson_main_only)##
## Overdispersion test
##
## data: backward_poisson_main_only
## z = 0.55444, p-value = 0.2896
## alternative hypothesis: true dispersion is greater than 1
## sample estimates:
## dispersion
## 1.111653
# regression coefs and 95%CI
backward_poisson_main_only |>
broom::tidy(conf.int = T, conf.level = 0.95) |>
mutate(across(where(is.double), ~round(., 3))) |>
kableExtra::kable(
format = 'pipe',
caption = "Main Effects Only: Coefficients & 95% CI"
)| term | estimate | std.error | statistic | p.value | conf.low | conf.high |
|---|---|---|---|---|---|---|
| (Intercept) | 0.273 | 0.183 | 1.486 | 0.137 | -0.112 | 0.610 |
| swab_pcr | 0.471 | 0.187 | 2.520 | 0.012 | 0.101 | 0.836 |
| wi_fi | 0.669 | 0.310 | 2.162 | 0.031 | 0.067 | 1.282 |
| university_ww | 0.238 | 0.089 | 2.681 | 0.007 | 0.059 | 0.412 |
| ottawa_ww | 1.020 | 0.306 | 3.331 | 0.001 | 0.471 | 1.674 |
# type II SS test
car::Anova(backward_poisson_main_only, type = 2) |>
rownames_to_column(var = 'Term') |>
as_tibble() |>
kableExtra::kable(
format = 'pipe',
caption = "Type II ANOVA"
)| Term | LR Chisq | Df | Pr(>Chisq) |
|---|---|---|---|
| swab_pcr | 6.173043 | 1 | 0.0129711 |
| wi_fi | 4.749727 | 1 | 0.0293029 |
| university_ww | 6.637193 | 1 | 0.0099871 |
| ottawa_ww | 15.769336 | 1 | 0.0000716 |
summary(backward_poisson_main_only)##
## Call:
## glm(formula = cases ~ swab_pcr + wi_fi + university_ww + ottawa_ww,
## family = "poisson", data = select(df_mod_complete_obs, -biweek))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.744 -1.113 -0.234 0.556 2.062
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.27253 0.18338 1.486 0.137243
## swab_pcr 0.47095 0.18685 2.520 0.011721 *
## wi_fi 0.66901 0.30951 2.162 0.030656 *
## university_ww 0.23791 0.08875 2.681 0.007351 **
## ottawa_ww 1.02005 0.30623 3.331 0.000865 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 176.454 on 28 degrees of freedom
## Residual deviance: 31.964 on 24 degrees of freedom
## AIC: 91.531
##
## Number of Fisher Scoring iterations: 5
plot(backward_poisson_main_only)hist(backward_poisson_main_only$residuals)rm(me_poisson)swab_summary_location_traffic |>
set_names(c('Traffic level', 'N', 'PCR-Positive', '%')) |>
kableExtra::kable(
format = 'pipe',
caption = 'Positivity by sample location traffic-level'
)| Traffic level | N | PCR-Positive | % |
|---|---|---|---|
| high traffic | 280 | 40 | 14.3 |
| low traffic | 274 | 32 | 11.7 |
To evaluate whether high traffic locations are different from low traffic sites, in terms of swab-PCR positivity, we fit a mixed model with random intercepts for different sites. There appears to be little difference (15% vs 12% overall) and this is confirmed by our mixed model with traffic level as a fixed effect (OR 0.71; 95% CI 0.82-2.2).
traffic_swabs <-
swabs_tidy |>
mutate(
traffic = get_traffic_level(location) |>
str_remove('\\straffic') |>
factor(levels = c('low', 'high'))
) |>
select(site, traffic, pcr_positive)
mixed_model <-
lme4::glmer(pcr_positive ~ traffic + (1 | site), data = traffic_swabs,
family = 'binomial')
mixed_model |>
broom.mixed::tidy(conf.int = T, exponentiate = T) |>
mutate(across(where(is.double), ~round(., 4))) |>
kableExtra::kable(
format = 'pipe',
caption = 'Traffic level - Random intercepts model'
)| effect | group | term | estimate | std.error | statistic | p.value | conf.low | conf.high |
|---|---|---|---|---|---|---|---|---|
| fixed | NA | (Intercept) | 0.1060 | 0.0377 | -6.3105 | 0.0000 | 0.0528 | 0.2128 |
| fixed | NA | traffichigh | 1.2800 | 0.3341 | 0.9457 | 0.3443 | 0.7674 | 2.1349 |
| ran_pars | site | sd__(Intercept) | 0.7082 | NA | NA | NA | NA | NA |
study_period_weeks <- crossing(
date = as_date(as_date('2021-09-21'):as_date('2022-01-31')),
site = c('90U', 'DMS', 'LPR', 'MNT', 'MRT', 'TBT')
) |>
mutate(
week = get_date_week(date),
redate = week_to_date(week)) |>
distinct(site, week)
# group swabs by week & site
per_bldg_swabs <-
swabs_tidy |>
filter(date_swab < '2022-02-02') |>
mutate(
site = str_remove(site, 'UO_'),
week = get_date_week(date_swab) |> as.character(),
copies = convert_cq_to_copies(pcr_ct),
copies_plus_one = if_else(
is.na(copies), 1, copies + 1
)
) |>
group_by(site, week) |>
summarise(
copies_plus_one = 10 ** mean(log10(copies_plus_one),
na.rm = T),
positivity = sum(pcr_positive == 'Positive') / n(),
.groups = 'drop')
# 72 cases associated with study bldgs (at least one)
per_bldg_cases <-
cases |>
filter(case_date > '2021-09-20') |>
unnest(associated_sites) |>
pivot_longer(UC:TBT, names_to = 'site') |>
filter(value) |>
select(-value, -biweek, -case) |>
mutate(
site = str_replace(site, 'UC', '90U'),
week = get_date_week(case_date)) |>
group_by(week, site) |>
summarise(cases = n(), .groups = 'drop')
per_bldg_df <- per_bldg_swabs |>
left_join(per_bldg_cases, by = join_by(site, week)) |>
mutate(cases = if_else(is.na(cases), 0, cases))
rm(study_period_weeks, per_bldg_cases, per_bldg_swabs)Poisson GLM w random intercepts: Cases ~ positivity + (1|site)
per_bldg_model <- lme4::glmer(
cases ~ positivity + (1|site),
data = per_bldg_df,
family = 'poisson'
)
broom.mixed::glance(per_bldg_model)## # A tibble: 1 × 7
## nobs sigma logLik AIC BIC deviance df.residual
## <int> <dbl> <dbl> <dbl> <dbl> <dbl> <int>
## 1 90 1 -64.4 135. 142. 82.9 87
# Wald CIs
broom.mixed::tidy(per_bldg_model, conf.int = T, conf.level = 0.95)## # A tibble: 3 × 9
## effect group term estim…¹ std.e…² stati…³ p.value conf.…⁴ conf.…⁵
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 fixed <NA> (Intercept) -1.99 0.397 -5.01 5.38e- 7 -2.77 -1.21
## 2 fixed <NA> positivity 3.20 0.496 6.47 1.00e-10 2.23 4.18
## 3 ran_pars site sd__(Interce… 0.622 NA NA NA NA NA
## # … with abbreviated variable names ¹estimate, ²std.error, ³statistic,
## # ⁴conf.low, ⁵conf.high
points_df <- crossing(
positivity = seq(0, 1, 0.1),
site = unique(per_bldg_df$site)
)
points_df |>
mutate(
pred = predict(
type = 'response',
object = per_bldg_model,
newdata = points_df
)) |>
ggplot(aes(positivity, pred, color = site)) +
geom_smooth(se = F, linewidth = 0.25, alpha = 1) +
geom_jitter(data = per_bldg_df,
alpha = 0.75, shape = 1, size = 0.85,
width = 0.01, height = 0.01,
aes(positivity, cases)) +
labs(x = 'Swab positivity', y = 'Cases', color = 'Site')broom.mixed::tidy(per_bldg_model, 'ran_vals',
conf.int = T, conf.level = 0.95) |>
ggplot(aes(y = level,
x = estimate,
xmin = conf.low,
xmax = conf.high)) +
geom_pointrange()per_bldg_model_copies <- lme4::glmer(
cases ~ log10(copies_plus_one) + (1|site),
data = per_bldg_df,
family = 'poisson'
)
broom.mixed::glance(per_bldg_model_copies)## # A tibble: 1 × 7
## nobs sigma logLik AIC BIC deviance df.residual
## <int> <dbl> <dbl> <dbl> <dbl> <dbl> <int>
## 1 90 1 -69.0 144. 151. 91.9 87
# Wald CIs
broom.mixed::tidy(per_bldg_model_copies,
conf.int = T,
conf.level = 0.95)## # A tibble: 3 × 9
## effect group term estim…¹ std.e…² stati…³ p.value conf.…⁴ conf.…⁵
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 fixed <NA> (Intercept) -1.72 0.369 -4.65 3.40e-6 -2.44 -0.992
## 2 fixed <NA> log10(copies_… 2.86 0.468 6.11 1.01e-9 1.94 3.78
## 3 ran_pars site sd__(Intercep… 0.616 NA NA NA NA NA
## # … with abbreviated variable names ¹estimate, ²std.error, ³statistic,
## # ⁴conf.low, ⁵conf.high
points_df <- crossing(
copies_plus_one = 10 ** seq(0, 1.13, 0.0001),
site = unique(per_bldg_df$site)
)
points_df |>
mutate(
pred = predict(
type = 'response',
object = per_bldg_model_copies,
newdata = points_df
)) |>
ggplot(aes(copies_plus_one, pred, color = site)) +
geom_smooth(
se = F, linewidth = 0.25, alpha = 1,
method = 'gam',
formula = y ~ s(x, bs = "cs")
) +
geom_jitter(data = per_bldg_df,
alpha = 0.75, shape = 1, size = 0.85,
width = 0.01, height = 0.01,
aes(copies_plus_one, cases)) +
labs(x = 'Swab: copies+1', y = 'Cases', color = 'Site') +
scale_x_log10()broom.mixed::tidy(per_bldg_model_copies, 'ran_vals',
conf.int = T, conf.level = 0.95) |>
ggplot(aes(y = level,
x = estimate,
xmin = conf.low,
xmax = conf.high)) +
geom_pointrange()# fig1 -----
fig1_timeline <-
event_dates |>
ggplot(aes(x = start, xend = end, y = event, yend = event)) +
geom_segment(linewidth = 4, lineend = 'round',
color = theme_color,
alpha = 0.3) +
geom_segment(linewidth = 1, lineend = 'butt',
color = theme_color,
alpha = 0.9) +
geom_point(aes(x = end), color = theme_color, size = 1.6) +
geom_point(color = theme_color, size = 1.6) +
scale_x_study_dates() +
labs(title = 'Timeline') +
theme_no_x_labels() +
theme_remove_grid()
fig1_cases <-
cases_biweekly |>
# adjust bar widths manually
mutate(biweek = if_else(wday(biweek) == 5, biweek + 0.75, biweek - 0.75)) |>
select(biweek, cases_student, cases_staff) |>
pivot_longer(-biweek) |>
mutate(name = str_remove(name, 'cases_') |> str_to_title()) |>
ggplot() +
geom_vline(
data = tibble(x = as_date('2022-02-03')),
aes(xintercept = x),
lty = 2,
color = 'gray'
) +
geom_text(
data = tibble(
x = as_date('2022-02-05'),
y = 18,
label = 'End of data'
),
aes(x, y, label = label),
size = 2,
hjust = 0
) +
geom_col(
aes(biweek, value, fill = name),
linewidth = 0.15,
width = 3.5,
color = 'gray80',
position = position_stack(),
na.rm = T
) +
labs(fill = NULL, title = 'UOttawa COVID-19 Cases') +
scale_fill_carto_d() +
scale_x_study_dates() +
theme_no_x_labels() +
theme_remove_grid() +
theme(
legend.position = c(0.9, 0.4),
legend.key.size = unit(2, 'mm'),
legend.background = element_rect(color = 'grey80')
)
fig1_swabs <-
swabs_biweekly |>
ggplot(aes(biweek, positivity)) +
geom_smooth(
linewidth = 0.5,
alpha = 0.2,
fill = theme_color,
span = 0.4,
method = 'loess',
formula = 'y ~ x',
na.rm = T) +
geom_point(color = theme_color,
alpha = 0.85,
shape = 1,
size = 1,
na.rm = T) +
scale_x_study_dates() +
labs(title = 'Swab Positivity (%)') +
theme_no_x_labels() +
theme_remove_grid()
fig1_co2 <-
ggplot(swabs_biweekly, aes(biweek, co2_mean)) +
geom_point(color = theme_color, shape = 1, alpha = 0.85, size = 1,
na.rm = T) +
geom_smooth(fill = theme_color, alpha = 0.2, size = 0.5,
na.rm = T) +
labs(x = NULL, y = NULL, title = 'CO~2~ (ppm)') +
scale_x_study_dates() +
theme_no_x_labels() +
theme_remove_grid()
fig1_wifi <-
wifi_biweekly |>
ggplot(aes(biweek, mean)) +
geom_smooth(span = 0.5,
size = 0.5,
alpha = 0.2,
fill = theme_color,
na.rm = T) +
geom_point(color = theme_color, alpha = 0.85, shape = 1, size = 1,
na.rm = T) +
labs(title = "Wi-Fi Connections") +
scale_x_study_dates() +
theme_no_x_labels() +
theme_remove_grid()
fig1_uo_ww <-
ggplot(uottawa_ww_biweekly,
aes(biweek, signal)) +
geom_smooth(span = 0.4, alpha = 0.2, size = 0.5,
fill = theme_color, na.rm = T) +
geom_point(
# aes(size = n),
alpha = 0.85, shape = 1, size = 1,
color = theme_color,
na.rm = T,
show.legend = F) +
labs(
title = 'University Waste-Water',
) +
scale_x_study_dates() +
scale_size(range = c(1, 3)) +
theme_no_x_labels() +
theme_remove_grid()
fig1_ott_ww <-
ggplot(regional_ww_daily) +
geom_smooth(aes(sample_date, mean),
method = 'loess', formula = 'y ~ x',
span = 0.2, size = 0.5, alpha = 0.2,
na.rm = T,
fill = theme_color) +
geom_point(data = regional_ww_biweekly,
aes(biweek, mean),
na.rm = T,
alpha = 0.85, shape = 1, size = 1,
color = theme_color) +
labs(x = 'Date',
title = 'Regional Waste-Water',
) +
scale_x_study_dates() +
theme_remove_grid() +
theme(axis.title.x = ggtext::element_markdown(lineheight = 1),
axis.text.x = ggtext::element_markdown(size = 6.7))
figure_1 <- wrap_plots(
fig1_timeline, fig1_cases, fig1_swabs, fig1_co2,
fig1_wifi, fig1_uo_ww, fig1_ott_ww
) +
plot_layout(ncol = 1, nrow = 7, heights = c(1, rep(1.5, 6)))
rm(fig1_timeline, fig1_swabs, fig1_co2, fig1_wifi, fig1_uo_ww, fig1_ott_ww, fig1_cases)
## Figure 2: Corr heatmap -----------------------------------
figure_2 <- corrr::autoplot(corr_spearman, triangular = "lower",
barheight = 10) +
geom_text(aes(label = r |> round(2)), size = 3.5)## Figure 3: Per-building res ---------------------------------
#### thematic elements and axis scales -----
# limits are based on the min/max of data to keep scales consistent across all plots of a given variable
scale_y_copies <- function() scale_y_log10(limits = c(1, 300))
scale_y_cases <- function() {
scale_y_continuous(breaks = seq(0, 5, 2), limits = c(0,5))
}
scale_y_wifi <- function() scale_y_continuous(limits = c(0, 1000),
n.breaks = 3)
scale_color_traffic <- function(){
scale_color_carto_d(
palette = 2,
breaks = c('High', 'Low'),
labels = c('High', 'Low'),
drop = F
)
}
theme_axes_lb_no_margin <- function(){
theme_remove_grid() +
theme(
axis.title.y = element_markdown(size = 8),
axis.text.y = element_markdown(size = 7),
axis.line = element_line(colour = "gray30", linewidth = rel(0.5)),
legend.title = element_markdown(size = 8),
legend.text = element_markdown(size = 7),
panel.border = element_blank(),
plot.margin = margin(1,0,0,0, 'mm')
)
}
theme_axes_lb <- function(){
theme_remove_grid() +
theme(
axis.title.y = element_markdown(size = 8),
axis.text.y = element_markdown(size = 7),
axis.line = element_line(colour = "gray30", linewidth = rel(0.5)),
legend.title = element_markdown(size = 8),
legend.text = element_markdown(size = 7),
panel.border = element_blank(),
plot.margin = margin(1,2,2,2, 'mm')
)
}
theme_axis_90U <- function(){
theme_remove_grid() +
theme(
panel.border = element_blank(),
axis.text.y = element_markdown(size = 7),
axis.line = element_line(colour = "gray30", linewidth = rel(0.5)),
legend.title = element_markdown(size = 8),
legend.text = element_markdown(size = 7),
plot.margin = margin(1,2,2,2, 'mm')
)
}
remove_y_axis_if_not_leftmost <- function(p, site){
if (site %in% c('DMS', 'TBT', 'LPR')) return(p)
p + theme_no_y_labels()
}
geom_event <- function(site, ymin){
geom_rect(
data = event_dates |> mutate(site = site),
aes(
xmin = start,
xmax = end,
ymin = ymin,
ymax = Inf,
fill = event
),
alpha = .25,
)
}
scale_fill_event <- function(){
rcartocolor::scale_fill_carto_d(palette = 'Safe',
name = 'Event')
}
#### individual plot functions -----
# plot cases timeseries for site
plot_site_cases <- function(data, site){
p <- data |>
ggplot() +
geom_col(
aes(biweek,
cases,
fill = factor(role, levels = c('Employee', 'Student'))),
na.rm = T,
linewidth = .1, width = 2.5, color = 'grey70') +
geom_rect(
data = tibble(
x = as_date('2022-02-02'),
xmax = as_date(Inf),
y = 0,
ymax = Inf
),
aes(xmin = x,
ymin = y,
xmax = xmax,
ymax = ymax),
alpha = .1
) +
scale_x_date_month() +
scale_y_cases() +
scale_fill_carto_d(
palette = 1,
breaks = c('Employee', 'Student'),
labels = c('Employee', 'Student'),
drop = FALSE,
name = 'Cases: Role') +
labs(y = 'Cases', x = NULL, title = site) +
theme_no_x_labels() +
theme_axes_lb_no_margin()
remove_y_axis_if_not_leftmost(p, site)
}
# plot swabs copies timeseries for site
plot_site_swabs <- function(data, site){
data_early <- data |> filter(date_swab < '2022-01-01')
data_late <- data |> filter(date_swab > '2022-01-01')
p <- data_early |>
ggplot() +
geom_event(site, ymin = 1) +
geom_path(
aes(date_swab, copies_plus_one, color = traffic),
alpha = 0.8, linewidth = 0.6) +
geom_point(
aes(date_swab, copies_plus_one, color = traffic),
size = 0.6, alpha = 0.8) +
geom_path(
data = data_late,
aes(date_swab, copies_plus_one, color = traffic),
alpha = 0.8, linewidth = 0.6) +
geom_point(
data = data_late,
aes(date_swab, copies_plus_one, color = traffic),
size = 0.6, alpha = 0.8) +
scale_x_date_month() +
scale_y_copies() +
scale_color_traffic() +
rcartocolor::scale_fill_carto_d(palette = 'Safe') +
labs(x = NULL,
y = 'Copies + 1',
fill = 'Event',
color = 'Traffic Level') +
theme_axes_lb_no_margin() +
theme(axis.title.y = element_markdown(),
axis.text.x = element_markdown(size = 6),
legend.text = element_markdown())
if (site == '90U') {
p <- p + theme_axis_90U()
} else {
p <- p + theme_no_x_labels()
}
remove_y_axis_if_not_leftmost(p, site)
}
# plot wifi timeseries for site
plot_site_wifi <- function(data, site){
if (is.null(data)) return(patchwork::plot_spacer())
p <- data |>
ggplot() +
geom_event(site, ymin = 1) +
geom_path(
aes(date, clients),
alpha = 0.5,
na.rm = T) +
scale_x_date_month() +
scale_y_wifi() +
rcartocolor::scale_fill_carto_d(palette = 'Safe') +
labs(y = 'Wifi', x = NULL, fill = 'Event') +
theme_axes_lb() +
theme(
axis.title.y = element_markdown(),
axis.text.x = element_markdown(size = 6))
remove_y_axis_if_not_leftmost(p, site)
}
## Plot datasets ====
fig3_cases <-
cases |>
unnest(associated_sites) |>
pivot_longer(UC:TBT, names_to = 'site') |>
mutate(site = if_else(site == 'UC', '90U', site)) |>
filter(value,
case_date > min(swabs_tidy$date_swab - 5),
case_date < max(swabs_tidy$date_swab + 5),
) |>
group_by(site, biweek, role) |>
summarise(cases = n(),
dates = list(case_date),
.groups = 'drop') |>
group_nest(site, .key = "cases")
fig3_swabs <-
swabs_tidy |>
mutate(
site = str_remove(site, 'UO_'),
traffic = get_traffic_level(location),
copies = convert_cq_to_copies(pcr_ct),
copies_plus_one = if_else(is.na(copies), 1, copies + 1),
) |>
select(site, traffic, date_swab, copies_plus_one) |>
group_nest(site, .key = "swabs")
fig3_wifi <-
wifi |>
mutate(site = str_remove(site, 'UO_')) |>
select(date, site, clients) |>
group_nest(site, .key = "wifi")
#### data handling and making building panels -----
# combine nested df
fig3_df_nest <- fig3_cases |>
left_join(fig3_swabs, by = join_by(site)) |>
left_join(fig3_wifi, by = join_by(site))
# map data to plots, combine into panel for each site
fig3_plts <-
fig3_df_nest |>
mutate(site = factor(site, levels = c(
'DMS','MRT', 'TBT', 'MNT', 'LPR', '90U'
))) |>
arrange(site) |>
transmute(
site = as.character(site),
plt_cases = map2(cases, site, plot_site_cases),
plt_swabs = map2(swabs, site, plot_site_swabs),
plt_wifi = map2(wifi, site, plot_site_wifi),
) |>
mutate(panel = pmap(
.l = list(site, plt_cases, plt_swabs, plt_wifi),
.f = function(site,plt_cases, plt_swabs, plt_wifi) {
p <- patchwork::wrap_plots(
plt_cases, plt_swabs, plt_wifi,
ncol = 1
)
if (site == '90U') {
p <- p + patchwork::plot_layout(
heights = c(1.16, 1.14, 0.63)
)
}
needs_axis <- site %in% c('DMS', 'TBT', 'LPR')
if (needs_axis) return(p)
p + patchwork::plot_annotation(theme = theme(
axis.title.y = element_blank(),
axis.text.y = element_blank()
))
}))
#### Combine all 6 sites' plots into 3*2 panel -----
(
figure_3 <-
patchwork::wrap_plots(
fig3_plts$panel,
ncol = 2,
tag_level = 'keep',
guides = 'collect'
)
)# stack summaries
swab_summary |>
mutate(site = 'Overall') |>
bind_rows(swab_summary_sites) |>
relocate(site) |>
set_names(c('Site', 'N', 'PCR-Positive', '%')) |>
kableExtra::kable(
format = 'pipe',
caption = 'Building Summary'
)| Site | N | PCR-Positive | % |
|---|---|---|---|
| Overall | 554 | 72 | 13.0 |
| UO_90U | 98 | 32 | 32.7 |
| UO_DMS | 86 | 5 | 5.8 |
| UO_MRT | 90 | 13 | 14.4 |
| UO_MNT | 98 | 9 | 9.2 |
| UO_TBT | 84 | 4 | 4.8 |
| UO_LPR | 98 | 9 | 9.2 |
kableExtra::kable(
x = corr_ci,
format = 'pipe',
caption = 'Spearman r for biweekly, campus-wide measures'
)| terms | lower | r | upper | p |
|---|---|---|---|---|
| SwPCR-CO2 | -0.4210420 | -0.1586261 | 0.1282930 | 0.2763179 |
| SwPCR-Wi-Fi | -0.4949934 | -0.2390451 | 0.0550748 | 0.1096082 |
| SwPCR-UnvWW | 0.3157817 | 0.5528179 | 0.7249053 | 0.0000559 |
| SwPCR-OttWW | 0.4973249 | 0.6830058 | 0.8088547 | 0.0000001 |
| SwPCR-Cases | 0.5282461 | 0.7434316 | 0.8688837 | 0.0000017 |
| CO2-Wi-Fi | -0.2214541 | 0.0735739 | 0.3562627 | 0.6270176 |
| CO2-UnvWW | -0.3284296 | -0.0455597 | 0.2448100 | 0.7610603 |
| CO2-OttWW | -0.4365919 | -0.1771429 | 0.1095086 | 0.2233583 |
| CO2-Cases | -0.5112380 | -0.1916081 | 0.1745854 | 0.3017934 |
| Wi-Fi-UnvWW | -0.5220947 | -0.2665257 | 0.0329629 | 0.0803214 |
| Wi-Fi-OttWW | -0.5224336 | -0.2736355 | 0.0181003 | 0.0657527 |
| Wi-Fi-Cases | -0.6351605 | -0.3564595 | 0.0043710 | 0.0531745 |
| UnvWW-OttWW | 0.3810857 | 0.6023358 | 0.7583331 | 0.0000075 |
| UnvWW-Cases | 0.0818404 | 0.4294475 | 0.6839052 | 0.0178699 |
| OttWW-Cases | 0.1761573 | 0.4993295 | 0.7253344 | 0.0042398 |
(figure_1) Figure 1: Time-series of (top to bottom) important events at the university, proportion of PCR-positive swabs, biweekly mean ambient CO2 (across collection sites), biweekly mean peak number of Wi-Fi connections (across 5 buildings), biweekly mean waste-water signal (across up to 6 collection sites; relative to PPMoV), biweekly mean regional waste-water detection (relative to PPMoV). Points show biweekly means. Trend lines show the LOESS fit.
figure_2rm(corr_spearman)Figure 2. Spearman correlation between biweekly campus-wide variables: self-reported cases, floor swab positivity (Swab PCR), mean CO2, mean daily peak Wi-Fi connections, aggregate waste-water signal at the university (University WW) and regional level (Ottawa WW),
figure_3
Figure 3. Campus COVID-19 cases, swab results, CO2
concentrations (during swab collection), and daily peak Wi-Fi
connections by building. Cases plots show counts of self-reported for
students and employees separately; several cases are associated with
multiple buildings. Case data collection was abandoned in early February
2022 (dashed line). Swabs and CO plots show results at two locations
within each building, with one sample collected in a high-traffic area
and the other in a low-traffic area. Swab PCR results are expressed as
the number of SARS-CoV-2 RNA copies plus one, on a log-scale. Points
represent the result for a single swab. Wi-fi plots show the peak daily
number of simultaneous connections per building. No Wi-Fi data was
available for the 90U building.
## save figures ----
# figs 1 & 3 don't work as a pdf for some reason
ggsave('fig/figure_1.png', figure_1, device = 'png', width = 7, height = 7)
ggsave('fig/figure_2.pdf', figure_2, device = 'pdf', height = 4, width = 4)
ggsave('fig/figure_2.png', figure_2, device = 'png', height = 4, width = 4)
ggsave('fig/figure_3.png', figure_3, device = 'png', dpi = 300, height = 6, width = 6)figure_sites_mapMap: Where are these collection sites? Pink markers represent waste-water collection sites; purple markers represent buildings where surface testing was performed. Only one building, Tabaret Hall, had both surface and waste-water testing. The locations of two waste-water sampling sites could not be ascertained and are not displayed on the map.
It remains unclear which buildings the ‘South Sampling Site’ and ‘North Sampling Site’ catchment areas are related to (not mapped).
cat(rs_abstract)Over the 32-week study period, we collected 554 swabs at six university buildings, including two buildings where wastewater surveillance was currently ongoing. Overall, 13% of swabs were PCR-positive for SARS-CoV-2, with individual buildings ranging between 4.8% and 32.7%. Comment on when spikes in cases, swabs, and waste-water occurred…. Comment on prediction of cases using swabs, ww, or combined approach…
result_p1 <-
lst(
corr_swab_uniww = corr_ci |> filter(terms == 'SwPCR-UnvWW'),
corr_r = corr_swab_uniww |> pluck('r') |> round(2), # 0.7
corr_r_lo = corr_swab_uniww |> pluck('lower') |> round(2),
corr_r_hi = corr_swab_uniww |> pluck('upper') |> round(2),
corr_95ci = str_glue('{corr_r_lo} - {corr_r_hi}')
) |>
glue::glue_data(
"Results
Detection of SARS-CoV-2 from floor swabs and wastewater across campus
From September 2021 to April 2022, we conducted environmental surveillance of SARS-CoV-2 across the University of Ottawa campus using both built environment swabbing and wastewater testing. Floor swabs were collected twice weekly from six buildings on the main campus, including a residence hall, the main library, and lecture and administration buildings. The floors of two sites within each building were sampled, a high-traffic site close to the main entrance, and a low-traffic site at a more remote location within the building. Wastewater was collected from six additional (non-correlated?) sites across the main campus over the study period. SARS-CoV-2 RNA was detected from built environment and wastewater samples according to established protocols (see Methods) (refs).
SARS-CoV-2 prevalence was low during the fall term (September to December 2021) according to both aggregate floor swab positivity and normalized wastewater signal (Fig. 1). However, the winter term (January to April 2022) was characterized by two spikes in environmental SARS-CoV-2 signal in January and late March with an intervening trough. This pattern was present in both the built environment and wastewater results and was driven by a subset of the individual sites sampled, specifically 90U, LPR, and MRT buildings (Fig. 3). We found a significant correlation between swab positivity and wastewater signal (Spearman r = {corr_r}, 95% CI: {corr_95ci}; Figs. 1, 2). The signal increases occurred after the local introduction of the Omicron variant and correspond with two city-wide peaks in COVID-19 prevalence reported in Ottawa wastewater and case data.
Built environment detection of SARS-CoV-2 corresponds to presence of COVID-19 cases")
cat(result_p1)## Results
## Detection of SARS-CoV-2 from floor swabs and wastewater across campus
## From September 2021 to April 2022, we conducted environmental surveillance of SARS-CoV-2 across the University of Ottawa campus using both built environment swabbing and wastewater testing. Floor swabs were collected twice weekly from six buildings on the main campus, including a residence hall, the main library, and lecture and administration buildings. The floors of two sites within each building were sampled, a high-traffic site close to the main entrance, and a low-traffic site at a more remote location within the building. Wastewater was collected from six additional (non-correlated?) sites across the main campus over the study period. SARS-CoV-2 RNA was detected from built environment and wastewater samples according to established protocols (see Methods) (refs).
##
## SARS-CoV-2 prevalence was low during the fall term (September to December 2021) according to both aggregate floor swab positivity and normalized wastewater signal (Fig. 1). However, the winter term (January to April 2022) was characterized by two spikes in environmental SARS-CoV-2 signal in January and late March with an intervening trough. This pattern was present in both the built environment and wastewater results and was driven by a subset of the individual sites sampled, specifically 90U, LPR, and MRT buildings (Fig. 3). We found a significant correlation between swab positivity and wastewater signal (Spearman r = 0.55, 95% CI: 0.32 - 0.72; Figs. 1, 2). The signal increases occurred after the local introduction of the Omicron variant and correspond with two city-wide peaks in COVID-19 prevalence reported in Ottawa wastewater and case data.
## Built environment detection of SARS-CoV-2 corresponds to presence of COVID-19 cases
result_p2 <- lst(
total_n_cases_study_period = total_n_cases_study_period, # 116
) |>
glue::glue_data(
"For most of the study period (September 2021 to February 2022), members of the university community (faculty, students, and staff) testing positive for COVID-19 were required to report whether they had been on campus while possibly infectious. There were {total_n_cases_study_period} reported cases of COVID-19 during the study period and their incidence broadly paralleled the environmental surveillance trends, with low numbers of cases reported in the fall term, and the highest numbers reported in January, before case reporting ceased (Fig. 1). Campus buildings visited by individuals with COVID-19 were also reported during the contact tracing procedure, which allowed us to determine whether detection from floor swabs occurred in buildings recently visited by infected individuals. Indeed, two sites with high positivity in January 2022 at the university residence (90U) and an administrative building (LPR), were associated with clusters of reported cases in students and staff, respectively (Fig. 3). In contrast, a high floor swab positivity in the main library during the same period, was not reflected in the case data. This could be an example of environmental surveillance identifying a signal where cases were under-reported (??)."
)
cat(result_p2)## For most of the study period (September 2021 to February 2022), members of the university community (faculty, students, and staff) testing positive for COVID-19 were required to report whether they had been on campus while possibly infectious. There were 116 reported cases of COVID-19 during the study period and their incidence broadly paralleled the environmental surveillance trends, with low numbers of cases reported in the fall term, and the highest numbers reported in January, before case reporting ceased (Fig. 1). Campus buildings visited by individuals with COVID-19 were also reported during the contact tracing procedure, which allowed us to determine whether detection from floor swabs occurred in buildings recently visited by infected individuals. Indeed, two sites with high positivity in January 2022 at the university residence (90U) and an administrative building (LPR), were associated with clusters of reported cases in students and staff, respectively (Fig. 3). In contrast, a high floor swab positivity in the main library during the same period, was not reflected in the case data. This could be an example of environmental surveillance identifying a signal where cases were under-reported (??).
lst() |>
glue::glue_data(
"Building occupancy measures such as Wi-Fi usage and CO2 levels did not track with case levels or built environment detection
The risk of COVID-19 transmission is increased in congregate settings and in buildings with poor ventilation (ref). Therefore, variation in case levels between buildings might reflect differences in building occupancy or ventilation. We used two proxy measures, CO2 levels and Wi-Fi usage, to estimate building occupancy or (in the case of CO2) poor air ventilation. CO2 was measured concurrently with the floor swabbing at each sampling site, while daily building-level Wi-fi usage was shared by the university. CO2 concentrations were remarkably stable across buildings and time, at levels (500-700 ppm) indicating high indoor air quality. Wi-fi usage, on the other hand, fluctuated during the term, with expected drops during reading week, winter break, and individual building closures (figs. 1, 3). Both measures of building occupancy, however, did not correlate with either campus COVID-19 cases or floor swab detection during the study period (p < 0.05; Fig. 2). Conclusion(?)"
)## Building occupancy measures such as Wi-Fi usage and CO2 levels did not track with case levels or built environment detection
## The risk of COVID-19 transmission is increased in congregate settings and in buildings with poor ventilation (ref). Therefore, variation in case levels between buildings might reflect differences in building occupancy or ventilation. We used two proxy measures, CO2 levels and Wi-Fi usage, to estimate building occupancy or (in the case of CO2) poor air ventilation. CO2 was measured concurrently with the floor swabbing at each sampling site, while daily building-level Wi-fi usage was shared by the university. CO2 concentrations were remarkably stable across buildings and time, at levels (500-700 ppm) indicating high indoor air quality. Wi-fi usage, on the other hand, fluctuated during the term, with expected drops during reading week, winter break, and individual building closures (figs. 1, 3). Both measures of building occupancy, however, did not correlate with either campus COVID-19 cases or floor swab detection during the study period (p < 0.05; Fig. 2). Conclusion(?)
lst() |>
glue::glue_data(
"Modeling of campus-wide cases based on environmental detection
Non-invasive approaches to monitoring SARS-CoV-2 levels such as environmental sampling are valuable when systematic testing of individuals is unfeasible. We evaluated environmental surveillance measures as predictors of the campus-wide case burden using Poisson regression models chosen through backward elimination. Predictors specified in the full models included surface swab positivity (across 6 buildings), on-campus and regional waste-water viral quantities, CO2 concentrations, and Wi-Fi user counts. Backward elimination indicated surface swab positivity and regional waste-water measures as the most significant predictors of cases during the same period (Table S:A). We repeated backwards elimination including interaction terms in the initial model. In the selected model, Wi-Fi (ß = 0.94; 95% CI: 0.28, 1.6), and the interaction between surface swabs and regional waste-water (ß = -0.55; 95% CI: -0.93, -0.20) were identified as significant effects, in addition to the swab positivity and regional waste-water (Table S;B). "
)## Modeling of campus-wide cases based on environmental detection
## Non-invasive approaches to monitoring SARS-CoV-2 levels such as environmental sampling are valuable when systematic testing of individuals is unfeasible. We evaluated environmental surveillance measures as predictors of the campus-wide case burden using Poisson regression models chosen through backward elimination. Predictors specified in the full models included surface swab positivity (across 6 buildings), on-campus and regional waste-water viral quantities, CO2 concentrations, and Wi-Fi user counts. Backward elimination indicated surface swab positivity and regional waste-water measures as the most significant predictors of cases during the same period (Table S:A). We repeated backwards elimination including interaction terms in the initial model. In the selected model, Wi-Fi (ß = 0.94; 95% CI: 0.28, 1.6), and the interaction between surface swabs and regional waste-water (ß = -0.55; 95% CI: -0.93, -0.20) were identified as significant effects, in addition to the swab positivity and regional waste-water (Table S;B).
lst() |>
glue::glue_data(
"We repeated backwards elimination once more, including on-campus waste-water as a predictor in an initial model with only main effects, though few observations were available for analysis (N=10; Table S:C). On-campus waste-water viral signal was indicated as a useful predictor of cases by the model selection procedure; however, the estimated effect size was small and non-significant (ß = 0.14; 95% CI: -0.03, 0.29), compared to the regional waste-water signal (ß = 0.94, 95% CI: 0.36, 1.6). As this model contained few observations, the surface swab positivity term also became non-significant (ß = 0.31, 95% CI: -0.01, 0.62). "
)## We repeated backwards elimination once more, including on-campus waste-water as a predictor in an initial model with only main effects, though few observations were available for analysis (N=10; Table S:C). On-campus waste-water viral signal was indicated as a useful predictor of cases by the model selection procedure; however, the estimated effect size was small and non-significant (ß = 0.14; 95% CI: -0.03, 0.29), compared to the regional waste-water signal (ß = 0.94, 95% CI: 0.36, 1.6). As this model contained few observations, the surface swab positivity term also became non-significant (ß = 0.31, 95% CI: -0.01, 0.62).
lst() |>
glue::glue_data(
"Modeling of per-building environmental detection…
We validated the predictivity of environmental surveillance against the reported case data, using a model that predicts presence of COVID-19 cases in a sampled building, using swab detection as a predictor. [Table – Model performance]. Fig. 4 overlays the model’s predictions with the surface detection and reported cases.
[Model of campus-wide cases – using swabs and wastewater as predictors? ie, complementarity of ww and floor swab approaches.]"
)## Modeling of per-building environmental detection…
## We validated the predictivity of environmental surveillance against the reported case data, using a model that predicts presence of COVID-19 cases in a sampled building, using swab detection as a predictor. [Table – Model performance]. Fig. 4 overlays the model’s predictions with the surface detection and reported cases.
## [Model of campus-wide cases – using swabs and wastewater as predictors? ie, complementarity of ww and floor swab approaches.]
lst() |>
glue::glue_data(
"Surface detection of SARS-CoV-2 was not greater in high-traffic areas
At each building where environmental surveillance by surface swabbing was performed, we selected one high-traffic area where people often travel or congregate and a second low-traffic area for contrast. We hypothesized that the floors in commonly frequented locations would have greater rates of SARS-CoV-2 detection. However, positivity rates were similar across high-traffic (14.3%, N=280) and low-traffic sites (11.7%, N=274). To confirm this, we fit a mixed-effects logistic regression model, with the surface swab result as a binary outcome, the traffic level as a fixed effect, and specified random intercepts for each building to account for the clustering of sites. This model indicated that higher-traffic locations did not have significantly greater positivity rates than low-traffic locations (OR = 1.3, 95% CI: 0.77 - 2.1)."
)## Surface detection of SARS-CoV-2 was not greater in high-traffic areas
## At each building where environmental surveillance by surface swabbing was performed, we selected one high-traffic area where people often travel or congregate and a second low-traffic area for contrast. We hypothesized that the floors in commonly frequented locations would have greater rates of SARS-CoV-2 detection. However, positivity rates were similar across high-traffic (14.3%, N=280) and low-traffic sites (11.7%, N=274). To confirm this, we fit a mixed-effects logistic regression model, with the surface swab result as a binary outcome, the traffic level as a fixed effect, and specified random intercepts for each building to account for the clustering of sites. This model indicated that higher-traffic locations did not have significantly greater positivity rates than low-traffic locations (OR = 1.3, 95% CI: 0.77 - 2.1).
Multiple imputation (mice) has a key assumption that data is MAR. But data is (arguably) not MAR, but because of some dynamics not measured in the data (like the university giving up hope on recording the data in 2022).
Using mice means making the analysis much more
complicated… Generally, you need to model each imputed dataset and then
analyze the pooled results. That gets a lot more complicated when
needing to first aggregate the data, join to a bunch of other data, do
various modelling tasks…. all nested within each iteration.
Instead, I used a simpler ad-hoc method:
Statistical analysis was performed using R version 4.2.2 (2022-10-31; cite Ihaka & Gentleman).
Generalized linear models were created using the glm
function. Mixed models were created using the glmer
function from the package lme4 (Bates et al.).
We fit poisson and negative binomial regression models with the number of campus-wide cases as the outcome; we applied backwards selection to evaluate predictors of campus-wide cases, including the aggregated results (means) of surface swabbing, waste-water testing, CO2 monitors, and Wi-Fi user counts.
We used mixed effects logistic regression to model the presence of SARS-CoV-2 infected individuals as a binary outcome (ie. positive event: one or more cases occurring during a week) in a university building, using surface swab PCR results as predictor with random intercepts for buildings.
Graphics were created with ggplot2 (v3.4.1)
Find our analysis code at our public github repository: https://github.com/CUBE-Ontario/UOttawa-Analysis
sessionInfo()## R version 4.2.2 (2022-10-31)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.2.1
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices datasets utils methods base
##
## other attached packages:
## [1] rcartocolor_2.0.0 glue_1.6.2 janitor_2.1.0 ggtext_0.1.2
## [5] patchwork_1.1.2 ggiraph_0.8.6 here_1.0.1 lubridate_1.9.2
## [9] forcats_1.0.0 stringr_1.5.0 dplyr_1.1.0 purrr_1.0.1
## [13] readr_2.1.4 tidyr_1.3.0 tibble_3.1.8 ggplot2_3.4.1
## [17] tidyverse_2.0.0
##
## loaded via a namespace (and not attached):
## [1] readxl_1.4.2 uuid_1.1-0 backports_1.4.1
## [4] systemfonts_1.0.4 splines_4.2.2 crosstalk_1.2.0
## [7] listenv_0.8.0 leaflet_2.1.1 digest_0.6.29
## [10] ca_0.71.1 foreach_1.5.2 htmltools_0.5.4
## [13] leaflet.providers_1.9.0 fansi_1.0.3 magrittr_2.0.3
## [16] memoise_2.0.1 tzdb_0.3.0 globals_0.15.0
## [19] extrafont_0.19 vroom_1.6.1 sandwich_3.0-2
## [22] extrafontdb_1.0 svglite_2.1.0 timechange_0.2.0
## [25] gfonts_0.2.0 colorspace_2.0-3 rvest_1.0.3
## [28] textshaping_0.3.6 xfun_0.31 crayon_1.5.1
## [31] jsonlite_1.8.4 AER_1.2-10 lme4_1.1-31
## [34] iterators_1.0.14 survival_3.4-0 zoo_1.8-11
## [37] kableExtra_1.3.4 registry_0.5-1 gtable_0.3.0
## [40] webshot_0.5.4 car_3.1-1 Rttf2pt1_1.3.12
## [43] abind_1.4-5 scales_1.2.0 fontquiver_0.2.1
## [46] Rcpp_1.0.8.3 viridisLite_0.4.0 xtable_1.8-4
## [49] gridtext_0.1.5 bit_4.0.4 Formula_1.2-4
## [52] fontLiberation_0.1.0 htmlwidgets_1.5.4 httr_1.4.5
## [55] ellipsis_0.3.2 pkgconfig_2.0.3 farver_2.1.0
## [58] sass_0.4.1 utf8_1.2.2 crul_1.3
## [61] tidyselect_1.2.0 labeling_0.4.2 rlang_1.0.6
## [64] later_1.3.0 munsell_0.5.0 cellranger_1.1.0
## [67] tools_4.2.2 cachem_1.0.7 cli_3.6.0
## [70] generics_0.1.2 corrr_0.4.4 broom_1.0.3
## [73] evaluate_0.15 fastmap_1.1.0 ragg_1.2.5
## [76] yaml_2.3.5 knitr_1.39 bit64_4.0.5
## [79] visdat_0.6.0 future_1.26.1 nlme_3.1-160
## [82] mime_0.12 xml2_1.3.3 compiler_4.2.2
## [85] rstudioapi_0.14 curl_4.3.2 bslib_0.3.1
## [88] stringi_1.7.6 highr_0.9 gdtools_0.3.1
## [91] hrbrthemes_0.8.0 lattice_0.20-45 Matrix_1.5-1
## [94] commonmark_1.8.1 fontBitstreamVera_0.1.1 psych_2.2.9
## [97] nloptr_2.0.3 markdown_1.4 vctrs_0.5.2
## [100] pillar_1.8.1 lifecycle_1.0.3 furrr_0.3.0
## [103] lmtest_0.9-40 jquerylib_0.1.4 seriation_1.4.0
## [106] httpuv_1.6.9 R6_2.5.1 TSP_1.2-1
## [109] promises_1.2.0.1 renv_0.14.0 parallelly_1.31.1
## [112] codetools_0.2-18 boot_1.3-28 MASS_7.3-58.1
## [115] rprojroot_2.0.3 withr_2.5.0 httpcode_0.3.0
## [118] mnormt_2.1.1 broom.mixed_0.2.9.4 naniar_1.0.0
## [121] mgcv_1.8-41 parallel_4.2.2 hms_1.1.2
## [124] grid_4.2.2 minqa_1.2.4 rmarkdown_2.14
## [127] snakecase_0.11.0 carData_3.0-5 shiny_1.7.4